Merge pull request #611 from chriseidhof/pdf-info

Added a PDF info agent

Andrew Cantino 10 years ago
parent
commit
8824d8d65e
4 changed files with 99 additions and 0 deletions
  1. 1 0
      Gemfile
  2. 6 0
      Gemfile.lock
  3. 65 0
      app/models/agents/pdf_info_agent.rb
  4. 27 0
      spec/models/agents/pdf_agent_spec.rb

+ 1 - 0
Gemfile

@@ -13,6 +13,7 @@ gem 'hipchat', '~> 1.2.0'         # HipchatAgent
13 13
 gem 'xmpp4r',  '~> 0.5.6'         # JabberAgent
14 14
 gem 'mqtt'                        # MQTTAgent
15 15
 gem 'slack-notifier', '~> 1.0.0'  # SlackAgent
16
+gem 'hypdf', '~> 1.0.7'           # PDFInfoAgent
16 17
 
17 18
 # GoogleCalendarPublishAgent
18 19
 gem "google-api-client", require: 'google/api_client'

+ 6 - 0
Gemfile.lock

@@ -174,12 +174,17 @@ GEM
174 174
     hipchat (1.2.0)
175 175
       httparty
176 176
     hpricot (0.8.6)
177
+    httmultiparty (0.3.10)
178
+      httparty (>= 0.7.3)
179
+      multipart-post
177 180
     http (0.5.1)
178 181
       http_parser.rb
179 182
     http_parser.rb (0.6.0)
180 183
     httparty (0.13.1)
181 184
       json (~> 1.8)
182 185
       multi_xml (>= 0.5.2)
186
+    hypdf (1.0.7)
187
+      httmultiparty (= 0.3.10)
183 188
     i18n (0.6.11)
184 189
     jquery-rails (3.1.1)
185 190
       railties (>= 3.0, < 5.0)
@@ -475,6 +480,7 @@ DEPENDENCIES
475 480
   guard-rspec
476 481
   hipchat (~> 1.2.0)
477 482
   httparty (~> 0.13)
483
+  hypdf (~> 1.0.7)
478 484
   jquery-rails (~> 3.1.0)
479 485
   json (~> 1.8.1)
480 486
   jsonpath (~> 0.5.6)

+ 65 - 0
app/models/agents/pdf_info_agent.rb

@@ -0,0 +1,65 @@
1
+require 'open-uri'
2
+require 'hypdf'
3
+
4
+module Agents
5
+  class PdfInfoAgent < Agent
6
+
7
+    gem_dependency_check { defined?(HyPDF) }
8
+
9
+    cannot_be_scheduled!
10
+
11
+    description <<-MD
12
+      #{'## Include the `hypdf` gem in your `Gemfile` to use PDFInfo Agents.' if dependencies_missing?}
13
+
14
+      In order for this agent to work, you need to have [HyPDF](https://devcenter.heroku.com/articles/hypdf) running and configured.
15
+
16
+      It works by acting on events that contain a key `url` in their payload, and runs the [pdfinfo](https://devcenter.heroku.com/articles/hypdf#pdfinfo) command on them.
17
+    MD
18
+
19
+    event_description <<-MD
20
+    This will change based on the metadata in the pdf.
21
+
22
+      { "Title"=>"Everyday Rails Testing with RSpec", 
23
+        "Author"=>"Aaron Sumner",
24
+        "Creator"=>"LaTeX with hyperref package",
25
+        "Producer"=>"xdvipdfmx (0.7.8)",
26
+        "CreationDate"=>"Fri Aug  2 05",
27
+        "32"=>"50 2013",
28
+        "Tagged"=>"no",
29
+        "Pages"=>"150",
30
+        "Encrypted"=>"no",
31
+        "Page size"=>"612 x 792 pts (letter)",
32
+        "Optimized"=>"no",
33
+        "PDF version"=>"1.5",
34
+        "url": "your url"
35
+      }
36
+    MD
37
+
38
+    def working?
39
+      !recent_error_logs?
40
+    end
41
+
42
+    def default_options
43
+      {}
44
+    end
45
+
46
+    def receive(incoming_events)
47
+      incoming_events.each do |event|
48
+        interpolate_with(event) do
49
+          url_to_scrape = event.payload['url']
50
+          check_url(url_to_scrape, event.payload) if url_to_scrape =~ /^https?:\/\//i
51
+        end
52
+      end
53
+    end
54
+
55
+    def check_url(in_url, payload)
56
+      return unless in_url.present?
57
+      Array(in_url).each do |url|
58
+        log "Fetching #{url}"
59
+        info = HyPDF.pdfinfo(open(url))
60
+        create_event :payload => info.merge(payload)
61
+      end
62
+    end
63
+
64
+  end
65
+end

+ 27 - 0
spec/models/agents/pdf_agent_spec.rb

@@ -0,0 +1,27 @@
1
+require 'spec_helper'
2
+
3
+describe Agents::PdfInfoAgent do
4
+  let(:agent) do
5
+    _agent = Agents::PdfInfoAgent.new(name: "PDF Info Agent")
6
+    _agent.user = users(:bob)
7
+    _agent.sources << agents(:bob_website_agent)
8
+    _agent.save!
9
+    _agent
10
+  end
11
+
12
+  describe "#receive" do
13
+    before do
14
+      @event = Event.new(payload: {'url' => 'http://mypdf.com'})
15
+    end
16
+
17
+    it "should call HyPDF" do
18
+      expect {
19
+        mock(agent).open('http://mypdf.com') { "data" }
20
+        mock(HyPDF).pdfinfo('data') { {title: "Huginn"} }
21
+        agent.receive([@event])
22
+      }.to change { Event.count }.by(1)
23
+      event = Event.last
24
+      expect(event.payload[:title]).to eq('Huginn')
25
+    end
26
+  end
27
+end